In [1]:
#For writing and reading data into MongoDb as Collections from json files and carrying out various operations
import json
from pymongo import MongoClient
import pandas as pd
import numpy as np
import pprint

#For writing and reading data into PostgreSql
import psycopg2 as pg
import sqlalchemy as sqla

#For combinig all dataframes into one dataframe 
from functools import reduce as rd

# For Visualisations
%matplotlib inline
import math
import seaborn as sns
import matplotlib.pyplot as plt
import random
import plotly.express as px
import plotly as py
import plotly.graph_objs as go

Part 1 - Reading and Writing to MongoDb and Data Pre-processing

We have Data for PES Game players into 4 datasets namely - Player Details, Player Positions, Attacker & Midfielder attributes, and Defender and Goalkeeper attributes.

Dataset worked on by

Player Details - Pooja Rakate;

Player Positions - Mayuresh Londhe;

Attacker & Midfielder attributes (AM Attr) - Ankit Rungta;

Defender and Goalkeeper attributes (DG Attr) - Priyanka Chimthankar

In [2]:
##Establishing connection with MongoDb
#Port number: 27017

client = MongoClient('localhost', 27017)

#Database Name : PES_Players_20
mydb = client['PES_Players_20']

##Loading json files as collections into PES_Players_20 in MongoDb
try:
    #Added by Pooja Rakate
    #Creating a collection Player_Details by loading json file
    Player_Details = mydb.Player_Details
    with open('D:/Pooja_NCI/1_Semester/DataBase and Analytical programming/Project/Datasets/PES 2020/Player_Details.json', encoding = 'utf-8') as f:
        file_data = json.load(f)
        
    #Insert all the records from json file as documents in the collection
    Player_Details.insert_many(file_data)

    ##Added by Mayuresh Londhe
    #Creating a collection Player_Position_Ranking by loading json file
    PPRANK = mydb['Player_Position_Ranking']
    with open('D:/Pooja_NCI/1_Semester/DataBase and Analytical programming/Project/Code from everyone/Player_positions.json',encoding='utf-8') as f:
        file_data1 = json.load(f)

    #Insert all the records from json file as documents in the collection
    PPRANK.insert_many(file_data1)

    ##Added by Ankit Rungta
    #Creating a collection AM_Player_attributes by loading json file
    AM_Attributes = mydb['AM_Player_attributes']
    with open('D:/Pooja_NCI/1_Semester/DataBase and Analytical programming/Project/Code from everyone/AM_Player_attributes.json',encoding="utf-8") as f2:
        file_data2 = json.load(f2)
    
     #Insert all the records from json file as documents in the collection
    AM_Attributes.insert_many(file_data2)

    ##Added by Priyanka Chimthankar
    #Creating a collection DG_Players_Attributes by loading json file
    DG_Attributes = mydb['DG_Players_Attributes']
    with open('D:/Pooja_NCI/1_Semester/DataBase and Analytical programming/Project/Code from everyone/DG_Player_attributes.json',encoding='utf-8') as f3:
        file_data3 = json.load(f3)
    
     #Insert all the records from json file as documents in the collection
    DG_Attributes.insert_many(file_data3)
    
except:
    print('Table not loaded into MongoDB !!')
Out[2]:
<pymongo.results.InsertManyResult at 0x1b1d2a7bfc0>
Start of code by Pooja Rakate for Data pre-processing of Player Details data
In [178]:
##Checking if collection is created properly in MongoDb by accessing one document from it
Player_Details.find_one()
Out[178]:
{'_id': ObjectId('5fe9eab37119e417c4609ffa'),
 'Player_id': 158023,
 'short_name': 'L. Messi',
 'long_name': 'Lionel Andrés Messi Cuccittini',
 'player_url': 'https://sofifa.com/player/158023/lionel-messi/20/159586',
 'age': 32,
 'dob': '24-06-1987',
 'height_cm': 170,
 'weight_kg': 72,
 'nationality': 'Argentina',
 'club': 'FC Barcelona',
 'overall': 94,
 'potential': 94,
 'value_eur': 95500000,
 'wage_eur': 565000,
 'player_positions': 'RW, CF, ST',
 'preferred_foot': 'Left',
 'international_reputation': 5,
 'weak_foot': 4,
 'skill_moves': 4,
 'work_rate': 'Medium/Low',
 'body_type': 'Messi',
 'real_face': 'Yes',
 'release_clause_eur': 195800000,
 'player_tags': '#Dribbler, #Distance Shooter, #Crosser, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward',
 'team_position': 'RW',
 'team_jersey_number': 10,
 'loaned_from': '',
 'joined': '01-07-2004',
 'contract_valid_until': 2021,
 'nation_position': '',
 'nation_jersey_number': None}
In [73]:
##reading data of a collection from MongoDb into a dataframe

cursor = Player_Details.find()
df_Player_Details =  pd.DataFrame(list(cursor))
df_Player_Details.head()
Out[73]:
_id Player_id short_name long_name player_url age dob height_cm weight_kg nationality ... real_face release_clause_eur player_tags team_position team_jersey_number loaned_from joined contract_valid_until nation_position nation_jersey_number
0 5fea2c144d648369bc937011 158023 L. Messi Lionel Andrés Messi Cuccittini https://sofifa.com/player/158023/lionel-messi/... 32 24-06-1987 170 72 Argentina ... Yes 195800000.0 #Dribbler, #Distance Shooter, #Crosser, #FK Sp... RW 10.0 01-07-2004 2021.0 NaN
1 5fea2c144d648369bc937012 20801 Cristiano Ronaldo Cristiano Ronaldo dos Santos Aveiro https://sofifa.com/player/20801/c-ronaldo-dos-... 34 05-02-1985 187 83 Portugal ... Yes 96500000.0 #Speedster, #Dribbler, #Distance Shooter, #Acr... LW 7.0 10-07-2018 2022.0 LS 7.0
2 5fea2c144d648369bc937013 190871 Neymar Jr Neymar da Silva Santos Junior https://sofifa.com/player/190871/neymar-da-sil... 27 05-02-1992 175 68 Brazil ... Yes 195200000.0 #Speedster, #Dribbler, #Playmaker  , #Crosser... CAM 10.0 03-08-2017 2022.0 LW 10.0
3 5fea2c144d648369bc937014 200389 J. Oblak Jan Oblak https://sofifa.com/player/200389/jan-oblak/20/... 26 07-01-1993 188 87 Slovenia ... Yes 164700000.0 GK 13.0 16-07-2014 2023.0 GK 1.0
4 5fea2c144d648369bc937015 183277 E. Hazard Eden Hazard https://sofifa.com/player/183277/eden-hazard/2... 28 07-01-1991 175 74 Belgium ... Yes 184500000.0 #Speedster, #Dribbler, #Acrobat LW 7.0 01-07-2019 2024.0 LF 10.0

5 rows × 32 columns

In [74]:
#Checking the shape of the dataframe
df_Player_Details.shape
Out[74]:
(18278, 32)
In [75]:
##Deleting _id column (created in MongoDb as a unique identification for documents of a collection) as it is not required
del df_Player_Details['_id']

df_Player_Details.shape
Out[75]:
(18278, 31)
In [76]:
##Checking for null values
pd.isnull(df_Player_Details).sum()
Out[76]:
Player_id                       0
short_name                      0
long_name                       0
player_url                      0
age                             0
dob                             0
height_cm                       0
weight_kg                       0
nationality                     0
club                            0
overall                         0
potential                       0
value_eur                       0
wage_eur                        0
player_positions                0
preferred_foot                  0
international_reputation        0
weak_foot                       0
skill_moves                     0
work_rate                       0
body_type                       0
real_face                       0
release_clause_eur           1298
player_tags                     0
team_position                   0
team_jersey_number            240
loaned_from                     0
joined                          0
contract_valid_until          240
nation_position                 0
nation_jersey_number        17152
dtype: int64
In [77]:
# removing column Nation Jersey Number as it has a large number of null values and is not so important for achieving the objective.

del df_Player_Details['nation_jersey_number']

df_Player_Details.shape
Out[77]:
(18278, 30)
In [78]:
#Similarly attributes like player_url, body_type, real_face, player_tags, loaned_from, joined, contract_valid_until can be deleted

df_Player_Details = df_Player_Details.drop(['player_url','body_type','real_face','player_tags','loaned_from', 'joined','team_jersey_number','contract_valid_until'], axis = 1)

df_Player_Details.shape
Out[78]:
(18278, 22)
In [79]:
#Again checking for null values
pd.isnull(df_Player_Details).sum()
Out[79]:
Player_id                      0
short_name                     0
long_name                      0
age                            0
dob                            0
height_cm                      0
weight_kg                      0
nationality                    0
club                           0
overall                        0
potential                      0
value_eur                      0
wage_eur                       0
player_positions               0
preferred_foot                 0
international_reputation       0
weak_foot                      0
skill_moves                    0
work_rate                      0
release_clause_eur          1298
team_position                  0
nation_position                0
dtype: int64
In [82]:
#Replacing null values in 'release_clause_eur' column with mean value of that column.
mean_value = df_Player_Details['release_clause_eur'].mean()

df_Player_Details=df_Player_Details.fillna(mean_value)
df_Player_Details.shape
Out[82]:
(18278, 22)
In [83]:
#All null records have been handled
pd.isnull(df_Player_Details).sum()
Out[83]:
Player_id                   0
short_name                  0
long_name                   0
age                         0
dob                         0
height_cm                   0
weight_kg                   0
nationality                 0
club                        0
overall                     0
potential                   0
value_eur                   0
wage_eur                    0
player_positions            0
preferred_foot              0
international_reputation    0
weak_foot                   0
skill_moves                 0
work_rate                   0
release_clause_eur          0
team_position               0
nation_position             0
dtype: int64
End of code by Pooja Rakate for Data pre-processing of Player Details data
Start of code by Mayuresh Londhe for Data pre-processing for Player Positions data
In [10]:
#Checking whether we are able to find records from mongo db 
for record in PPRANK.find({'Player_id':158023}):
    print(record)
{'_id': ObjectId('5fea2c164d648369bc93b777'), 'Player_id': 158023, 'short_name': 'L. Messi', 'ls': 89, 'st': 89, 'rs': 89, 'lw': 93, 'lf': 93, 'cf': 93, 'rf': 93, 'rw': 93, 'lam': 93, 'cam': 93, 'ram': 93, 'lm': 92, 'lcm': 87, 'cm': 87, 'rcm': 87, 'rm': 92, 'lwb': 68, 'ldm': 66, 'cdm': 66, 'rdm': 66, 'rwb': 68, 'lb': 63, 'lcb': 52, 'cb': 52, 'rcb': 52, 'rb': 63}
In [11]:
#Finding out Number of Goalkeepers 
# As a Goalkeeper plays only in one position dataset has Null values for all other postions for GoalKeeper.
#Due to which 2036 is the number of goalkeepers in dataset and in all Postion Columns there will 2036 Null values. 
#Which we will be replacing by 0

PPRANK.count_documents({'ls':None})  # counting null value in left Striker (LS) postion column
Out[11]:
2036
In [12]:
###reading data of a collection from MongoDb into a dataframe
cursor1 = PPRANK.find()
df_pos_rank =  pd.DataFrame(list(cursor1))
print(df_pos_rank)
                            _id  Player_id         short_name    ls    st  \
0      5fea2c164d648369bc93b777     158023           L. Messi  89.0  89.0   
1      5fea2c164d648369bc93b778      20801  Cristiano Ronaldo  91.0  91.0   
2      5fea2c164d648369bc93b779     190871          Neymar Jr  84.0  84.0   
3      5fea2c164d648369bc93b77a     200389           J. Oblak   NaN   NaN   
4      5fea2c164d648369bc93b77b     183277          E. Hazard  83.0  83.0   
...                         ...        ...                ...   ...   ...   
18273  5fea2c164d648369bc93fed8     245006         Shao Shuai  32.0  32.0   
18274  5fea2c164d648369bc93fed9     250995       Xiao Mingjie  33.0  33.0   
18275  5fea2c164d648369bc93feda     252332          Zhang Wei  43.0  43.0   
18276  5fea2c164d648369bc93fedb     251110       Wang Haijian  43.0  43.0   
18277  5fea2c164d648369bc93fedc     233449         Pan Ximing  42.0  42.0   

         rs    lw    lf    cf    rf  ...   lwb   ldm   cdm   rdm   rwb    lb  \
0      89.0  93.0  93.0  93.0  93.0  ...  68.0  66.0  66.0  66.0  68.0  63.0   
1      91.0  89.0  90.0  90.0  90.0  ...  65.0  61.0  61.0  61.0  65.0  61.0   
2      84.0  90.0  89.0  89.0  89.0  ...  66.0  61.0  61.0  61.0  66.0  61.0   
3       NaN   NaN   NaN   NaN   NaN  ...   NaN   NaN   NaN   NaN   NaN   NaN   
4      83.0  89.0  88.0  88.0  88.0  ...  66.0  63.0  63.0  63.0  66.0  61.0   
...     ...   ...   ...   ...   ...  ...   ...   ...   ...   ...   ...   ...   
18273  32.0  31.0  31.0  31.0  31.0  ...  43.0  42.0  42.0  42.0  43.0  45.0   
18274  33.0  33.0  32.0  32.0  32.0  ...  44.0  43.0  43.0  43.0  44.0  46.0   
18275  43.0  43.0  43.0  43.0  43.0  ...  47.0  49.0  49.0  49.0  47.0  47.0   
18276  43.0  45.0  44.0  44.0  44.0  ...  48.0  48.0  48.0  48.0  48.0  48.0   
18277  42.0  44.0  43.0  43.0  43.0  ...  48.0  49.0  49.0  49.0  48.0  48.0   

        lcb    cb   rcb    rb  
0      52.0  52.0  52.0  63.0  
1      53.0  53.0  53.0  61.0  
2      46.0  46.0  46.0  61.0  
3       NaN   NaN   NaN   NaN  
4      49.0  49.0  49.0  61.0  
...     ...   ...   ...   ...  
18273  46.0  46.0  46.0  45.0  
18274  47.0  47.0  47.0  46.0  
18275  49.0  49.0  49.0  47.0  
18276  49.0  49.0  49.0  48.0  
18277  50.0  50.0  50.0  48.0  

[18278 rows x 29 columns]
In [42]:
#Count of Null Values from All the columns

pd.isnull(df_pos_rank).sum()
Out[42]:
_id              0
Player_id        0
short_name       0
ls            2036
st            2036
rs            2036
lw            2036
lf            2036
cf            2036
rf            2036
rw            2036
lam           2036
cam           2036
ram           2036
lm            2036
lcm           2036
cm            2036
rcm           2036
rm            2036
lwb           2036
ldm           2036
cdm           2036
rdm           2036
rwb           2036
lb            2036
lcb           2036
cb            2036
rcb           2036
rb            2036
dtype: int64
In [13]:
# Removed Object Id

del df_pos_rank["_id"]
df_pos_rank.head()
Out[13]:
Player_id short_name ls st rs lw lf cf rf rw ... lwb ldm cdm rdm rwb lb lcb cb rcb rb
0 158023 L. Messi 89.0 89.0 89.0 93.0 93.0 93.0 93.0 93.0 ... 68.0 66.0 66.0 66.0 68.0 63.0 52.0 52.0 52.0 63.0
1 20801 Cristiano Ronaldo 91.0 91.0 91.0 89.0 90.0 90.0 90.0 89.0 ... 65.0 61.0 61.0 61.0 65.0 61.0 53.0 53.0 53.0 61.0
2 190871 Neymar Jr 84.0 84.0 84.0 90.0 89.0 89.0 89.0 90.0 ... 66.0 61.0 61.0 61.0 66.0 61.0 46.0 46.0 46.0 61.0
3 200389 J. Oblak NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 183277 E. Hazard 83.0 83.0 83.0 89.0 88.0 88.0 88.0 89.0 ... 66.0 63.0 63.0 63.0 66.0 61.0 49.0 49.0 49.0 61.0

5 rows × 28 columns

In [14]:
# # filling NA values: only goalkeeprs have missing data for these variables as these are ratings for positions other than goalkeeper.
#But we cannot delete rows having NA values as the goalkeepers have data for other variables. Hence, replacing them with 0.
df_pos_rank=df_pos_rank.fillna(0)
df_pos_rank.head()
Out[14]:
Player_id short_name ls st rs lw lf cf rf rw ... lwb ldm cdm rdm rwb lb lcb cb rcb rb
0 158023 L. Messi 89.0 89.0 89.0 93.0 93.0 93.0 93.0 93.0 ... 68.0 66.0 66.0 66.0 68.0 63.0 52.0 52.0 52.0 63.0
1 20801 Cristiano Ronaldo 91.0 91.0 91.0 89.0 90.0 90.0 90.0 89.0 ... 65.0 61.0 61.0 61.0 65.0 61.0 53.0 53.0 53.0 61.0
2 190871 Neymar Jr 84.0 84.0 84.0 90.0 89.0 89.0 89.0 90.0 ... 66.0 61.0 61.0 61.0 66.0 61.0 46.0 46.0 46.0 61.0
3 200389 J. Oblak 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 183277 E. Hazard 83.0 83.0 83.0 89.0 88.0 88.0 88.0 89.0 ... 66.0 63.0 63.0 63.0 66.0 61.0 49.0 49.0 49.0 61.0

5 rows × 28 columns

In [15]:
# checking whether fillna is successful.
#it is successful
print(df_pos_rank)
       Player_id         short_name    ls    st    rs    lw    lf    cf    rf  \
0         158023           L. Messi  89.0  89.0  89.0  93.0  93.0  93.0  93.0   
1          20801  Cristiano Ronaldo  91.0  91.0  91.0  89.0  90.0  90.0  90.0   
2         190871          Neymar Jr  84.0  84.0  84.0  90.0  89.0  89.0  89.0   
3         200389           J. Oblak   0.0   0.0   0.0   0.0   0.0   0.0   0.0   
4         183277          E. Hazard  83.0  83.0  83.0  89.0  88.0  88.0  88.0   
...          ...                ...   ...   ...   ...   ...   ...   ...   ...   
18273     245006         Shao Shuai  32.0  32.0  32.0  31.0  31.0  31.0  31.0   
18274     250995       Xiao Mingjie  33.0  33.0  33.0  33.0  32.0  32.0  32.0   
18275     252332          Zhang Wei  43.0  43.0  43.0  43.0  43.0  43.0  43.0   
18276     251110       Wang Haijian  43.0  43.0  43.0  45.0  44.0  44.0  44.0   
18277     233449         Pan Ximing  42.0  42.0  42.0  44.0  43.0  43.0  43.0   

         rw  ...   lwb   ldm   cdm   rdm   rwb    lb   lcb    cb   rcb    rb  
0      93.0  ...  68.0  66.0  66.0  66.0  68.0  63.0  52.0  52.0  52.0  63.0  
1      89.0  ...  65.0  61.0  61.0  61.0  65.0  61.0  53.0  53.0  53.0  61.0  
2      90.0  ...  66.0  61.0  61.0  61.0  66.0  61.0  46.0  46.0  46.0  61.0  
3       0.0  ...   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
4      89.0  ...  66.0  63.0  63.0  63.0  66.0  61.0  49.0  49.0  49.0  61.0  
...     ...  ...   ...   ...   ...   ...   ...   ...   ...   ...   ...   ...  
18273  31.0  ...  43.0  42.0  42.0  42.0  43.0  45.0  46.0  46.0  46.0  45.0  
18274  33.0  ...  44.0  43.0  43.0  43.0  44.0  46.0  47.0  47.0  47.0  46.0  
18275  43.0  ...  47.0  49.0  49.0  49.0  47.0  47.0  49.0  49.0  49.0  47.0  
18276  45.0  ...  48.0  48.0  48.0  48.0  48.0  48.0  49.0  49.0  49.0  48.0  
18277  44.0  ...  48.0  49.0  49.0  49.0  48.0  48.0  50.0  50.0  50.0  48.0  

[18278 rows x 28 columns]
End of code by Mayuresh Londhe for Data pre-processing of Player positions data
Start of code by Ankit Rungta for Data pre-processing of AM attributes data
In [16]:
####Checking whether we are able to find records from mongo db 

pprint.pprint(AM_Attributes.find({"item": {"Player_id":20801}}))
<pymongo.cursor.Cursor object at 0x000001B1D366D370>
In [21]:
##reading data of a collection from MongoDb into a dataframe
cursor2 = AM_Attributes.find()
df_AM_Attr =  pd.DataFrame(list(cursor2))
print(df_AM_Attr)
                            _id  Player_id         short_name  pace  shooting  \
0      5fea2c184d648369bc93fedd     158023           L. Messi  87.0      92.0   
1      5fea2c184d648369bc93fede      20801  Cristiano Ronaldo  90.0      93.0   
2      5fea2c184d648369bc93fedf     190871          Neymar Jr  91.0      85.0   
3      5fea2c184d648369bc93fee0     200389           J. Oblak   NaN       NaN   
4      5fea2c184d648369bc93fee1     183277          E. Hazard  91.0      83.0   
...                         ...        ...                ...   ...       ...   
18273  5fea2c184d648369bc94463e     245006         Shao Shuai  57.0      23.0   
18274  5fea2c184d648369bc94463f     250995       Xiao Mingjie  58.0      24.0   
18275  5fea2c184d648369bc944640     252332          Zhang Wei  54.0      35.0   
18276  5fea2c184d648369bc944641     251110       Wang Haijian  59.0      35.0   
18277  5fea2c184d648369bc944642     233449         Pan Ximing  60.0      32.0   

       passing  dribbling  attacking_crossing  attacking_finishing  \
0         92.0       96.0                  88                   95   
1         82.0       89.0                  84                   94   
2         87.0       95.0                  87                   87   
3          NaN        NaN                  13                   11   
4         86.0       94.0                  81                   84   
...        ...        ...                 ...                  ...   
18273     28.0       33.0                  21                   17   
18274     33.0       35.0                  24                   20   
18275     44.0       45.0                  32                   33   
18276     47.0       47.0                  39                   34   
18277     51.0       45.0                  44                   30   

       attacking_heading_accuracy  ...  movement_acceleration  \
0                              70  ...                     91   
1                              89  ...                     89   
2                              62  ...                     94   
3                              15  ...                     43   
4                              61  ...                     94   
...                           ...  ...                    ...   
18273                          40  ...                     56   
18274                          42  ...                     55   
18275                          49  ...                     55   
18276                          47  ...                     55   
18277                          48  ...                     54   

       movement_sprint_speed  movement_agility  movement_reactions  \
0                         84                93                  95   
1                         91                87                  96   
2                         89                96                  92   
3                         60                67                  88   
4                         88                95                  90   
...                      ...               ...                 ...   
18273                     58                45                  40   
18274                     61                43                  41   
18275                     54                52                  52   
18276                     63                55                  54   
18277                     64                52                  49   

       movement_balance  power_long_shots  mentality_positioning  vision  \
0                    95                94                     94      94   
1                    71                93                     95      82   
2                    84                84                     87      90   
3                    49                12                     11      65   
4                    94                80                     87      89   
...                 ...               ...                    ...     ...   
18273                73                16                     28      25   
18274                76                20                     23      25   
18275                57                26                     38      38   
18276                59                28                     31      48   
18277                71                30                     29      52   

       penalties  composure  
0             75         96  
1             85         95  
2             90         94  
3             11         68  
4             88         91  
...          ...        ...  
18273         39         41  
18274         37         35  
18275         36         39  
18276         36         40  
18277         38         42  

[18278 rows x 27 columns]
In [22]:
####Removing unnecessary column id that was added while loading data from MongoDb into dataframe
df_AM_Attr = df_AM_Attr.drop(columns="_id")
df_AM_Attr.head()
Out[22]:
Player_id short_name pace shooting passing dribbling attacking_crossing attacking_finishing attacking_heading_accuracy attacking_short_passing ... movement_acceleration movement_sprint_speed movement_agility movement_reactions movement_balance power_long_shots mentality_positioning vision penalties composure
0 158023 L. Messi 87.0 92.0 92.0 96.0 88 95 70 92 ... 91 84 93 95 95 94 94 94 75 96
1 20801 Cristiano Ronaldo 90.0 93.0 82.0 89.0 84 94 89 83 ... 89 91 87 96 71 93 95 82 85 95
2 190871 Neymar Jr 91.0 85.0 87.0 95.0 87 87 62 87 ... 94 89 96 92 84 84 87 90 90 94
3 200389 J. Oblak NaN NaN NaN NaN 13 11 15 43 ... 43 60 67 88 49 12 11 65 11 68
4 183277 E. Hazard 91.0 83.0 86.0 94.0 81 84 61 89 ... 94 88 95 90 94 80 87 89 88 91

5 rows × 26 columns

In [23]:
#Checking for null Values 
pd.isnull(df_AM_Attr)
Out[23]:
Player_id short_name pace shooting passing dribbling attacking_crossing attacking_finishing attacking_heading_accuracy attacking_short_passing ... movement_acceleration movement_sprint_speed movement_agility movement_reactions movement_balance power_long_shots mentality_positioning vision penalties composure
0 False False False False False False False False False False ... False False False False False False False False False False
1 False False False False False False False False False False ... False False False False False False False False False False
2 False False False False False False False False False False ... False False False False False False False False False False
3 False False True True True True False False False False ... False False False False False False False False False False
4 False False False False False False False False False False ... False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
18273 False False False False False False False False False False ... False False False False False False False False False False
18274 False False False False False False False False False False ... False False False False False False False False False False
18275 False False False False False False False False False False ... False False False False False False False False False False
18276 False False False False False False False False False False ... False False False False False False False False False False
18277 False False False False False False False False False False ... False False False False False False False False False False

18278 rows × 26 columns

In [24]:
pd.isnull(df_AM_Attr).sum()
Out[24]:
Player_id                        0
short_name                       0
pace                          2036
shooting                      2036
passing                       2036
dribbling                     2036
attacking_crossing               0
attacking_finishing              0
attacking_heading_accuracy       0
attacking_short_passing          0
attacking_volleys                0
skill_dribbling                  0
skill_curve                      0
skill_fk_accuracy                0
skill_long_passing               0
skill_ball_control               0
movement_acceleration            0
movement_sprint_speed            0
movement_agility                 0
movement_reactions               0
movement_balance                 0
power_long_shots                 0
mentality_positioning            0
vision                           0
penalties                        0
composure                        0
dtype: int64
In [25]:
#Having null values in Pace,Shooting, and dribbling 
##Replacing these NA values with 0 as the data is for attackers and mid-fielders and the null values are for Goal-keepers and Defender

df_AM_Attr=df_AM_Attr.fillna(0)
df_AM_Attr.head()
Out[25]:
Player_id short_name pace shooting passing dribbling attacking_crossing attacking_finishing attacking_heading_accuracy attacking_short_passing ... movement_acceleration movement_sprint_speed movement_agility movement_reactions movement_balance power_long_shots mentality_positioning vision penalties composure
0 158023 L. Messi 87.0 92.0 92.0 96.0 88 95 70 92 ... 91 84 93 95 95 94 94 94 75 96
1 20801 Cristiano Ronaldo 90.0 93.0 82.0 89.0 84 94 89 83 ... 89 91 87 96 71 93 95 82 85 95
2 190871 Neymar Jr 91.0 85.0 87.0 95.0 87 87 62 87 ... 94 89 96 92 84 84 87 90 90 94
3 200389 J. Oblak 0.0 0.0 0.0 0.0 13 11 15 43 ... 43 60 67 88 49 12 11 65 11 68
4 183277 E. Hazard 91.0 83.0 86.0 94.0 81 84 61 89 ... 94 88 95 90 94 80 87 89 88 91

5 rows × 26 columns

In [26]:
####Checking for null values

pd.isnull(df_AM_Attr).sum()
Out[26]:
Player_id                     0
short_name                    0
pace                          0
shooting                      0
passing                       0
dribbling                     0
attacking_crossing            0
attacking_finishing           0
attacking_heading_accuracy    0
attacking_short_passing       0
attacking_volleys             0
skill_dribbling               0
skill_curve                   0
skill_fk_accuracy             0
skill_long_passing            0
skill_ball_control            0
movement_acceleration         0
movement_sprint_speed         0
movement_agility              0
movement_reactions            0
movement_balance              0
power_long_shots              0
mentality_positioning         0
vision                        0
penalties                     0
composure                     0
dtype: int64
End of code by Ankit Rungta for Data pre-processing of AM attributes data
Start of code by Priyanka Chimthankar for Data pre-processing of DG attributes data
In [27]:
###Checking whether we are able to find records from mongo db 
cursor3 = DG_Attributes.find()
df_DG_Attr =  pd.DataFrame(list(cursor3))
print(df_DG_Attr)
                            _id  Player_id         short_name  stamina  \
0      5fea2c1a4d648369bc944643     158023           L. Messi       75   
1      5fea2c1a4d648369bc944644      20801  Cristiano Ronaldo       85   
2      5fea2c1a4d648369bc944645     190871          Neymar Jr       81   
3      5fea2c1a4d648369bc944646     200389           J. Oblak       41   
4      5fea2c1a4d648369bc944647     183277          E. Hazard       84   
...                         ...        ...                ...      ...   
18273  5fea2c1a4d648369bc948da4     245006         Shao Shuai       60   
18274  5fea2c1a4d648369bc948da5     250995       Xiao Mingjie       55   
18275  5fea2c1a4d648369bc948da6     252332          Zhang Wei       50   
18276  5fea2c1a4d648369bc948da7     251110       Wang Haijian       42   
18277  5fea2c1a4d648369bc948da8     233449         Pan Ximing       40   

       strength  defending  physic  defending_marking  \
0            68       39.0    66.0                 33   
1            78       35.0    78.0                 28   
2            49       32.0    58.0                 27   
3            78        NaN     NaN                 27   
4            63       35.0    66.0                 34   
...         ...        ...     ...                ...   
18273        47       47.0    51.0                 45   
18274        44       48.0    48.0                 42   
18275        51       48.0    51.0                 46   
18276        55       45.0    52.0                 39   
18277        61       47.0    55.0                 52   

       defending_standing_tackle  defending_sliding_tackle  aggression  \
0                             37                        26          48   
1                             32                        24          63   
2                             26                        29          51   
3                             12                        18          34   
4                             27                        22          54   
...                          ...                       ...         ...   
18273                         50                        52          46   
18274                         53                        57          42   
18275                         52                        46          50   
18276                         44                        54          57   
18277                         42                        48          54   

       interceptions  goalkeeping_diving  goalkeeping_handling  \
0                 40                   6                    11   
1                 29                   7                    11   
2                 36                   9                     9   
3                 19                  87                    92   
4                 41                  11                    12   
...              ...                 ...                   ...   
18273             48                   5                     5   
18274             49                  13                     6   
18275             45                   7                     8   
18276             49                  14                     9   
18277             47                   8                    12   

       goalkeeping_kicking  goalkeeping_reflexes  gk_speed  \
0                       15                     8       NaN   
1                       15                    11       NaN   
2                       15                    11       NaN   
3                       78                    89      52.0   
4                        6                     8       NaN   
...                    ...                   ...       ...   
18273                   13                    14       NaN   
18274                   14                     9       NaN   
18275                   10                    14       NaN   
18276                   13                    13       NaN   
18277                   14                    10       NaN   

       goalkeeping_positioning  shot_power  power_jumping  
0                           14          86             68  
1                           14          95             95  
2                           15          80             61  
3                           90          59             78  
4                            8          82             56  
...                        ...         ...            ...  
18273                        5          36             70  
18274                       11          33             72  
18275                        6          48             60  
18276                       13          46             61  
18277                        9          41             64  

[18278 rows x 20 columns]
In [28]:
####Removing id column as it is not needed
df_DG_Attr=df_DG_Attr.drop(columns= "_id")
df_DG_Attr
Out[28]:
Player_id short_name stamina strength defending physic defending_marking defending_standing_tackle defending_sliding_tackle aggression interceptions goalkeeping_diving goalkeeping_handling goalkeeping_kicking goalkeeping_reflexes gk_speed goalkeeping_positioning shot_power power_jumping
0 158023 L. Messi 75 68 39.0 66.0 33 37 26 48 40 6 11 15 8 NaN 14 86 68
1 20801 Cristiano Ronaldo 85 78 35.0 78.0 28 32 24 63 29 7 11 15 11 NaN 14 95 95
2 190871 Neymar Jr 81 49 32.0 58.0 27 26 29 51 36 9 9 15 11 NaN 15 80 61
3 200389 J. Oblak 41 78 NaN NaN 27 12 18 34 19 87 92 78 89 52.0 90 59 78
4 183277 E. Hazard 84 63 35.0 66.0 34 27 22 54 41 11 12 6 8 NaN 8 82 56
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
18273 245006 Shao Shuai 60 47 47.0 51.0 45 50 52 46 48 5 5 13 14 NaN 5 36 70
18274 250995 Xiao Mingjie 55 44 48.0 48.0 42 53 57 42 49 13 6 14 9 NaN 11 33 72
18275 252332 Zhang Wei 50 51 48.0 51.0 46 52 46 50 45 7 8 10 14 NaN 6 48 60
18276 251110 Wang Haijian 42 55 45.0 52.0 39 44 54 57 49 14 9 13 13 NaN 13 46 61
18277 233449 Pan Ximing 40 61 47.0 55.0 52 42 48 54 47 8 12 14 10 NaN 9 41 64

18278 rows × 19 columns

In [29]:
####Checking for null values in the collection
pd.isnull(df_DG_Attr)
Out[29]:
Player_id short_name stamina strength defending physic defending_marking defending_standing_tackle defending_sliding_tackle aggression interceptions goalkeeping_diving goalkeeping_handling goalkeeping_kicking goalkeeping_reflexes gk_speed goalkeeping_positioning shot_power power_jumping
0 False False False False False False False False False False False False False False False True False False False
1 False False False False False False False False False False False False False False False True False False False
2 False False False False False False False False False False False False False False False True False False False
3 False False False False True True False False False False False False False False False False False False False
4 False False False False False False False False False False False False False False False True False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
18273 False False False False False False False False False False False False False False False True False False False
18274 False False False False False False False False False False False False False False False True False False False
18275 False False False False False False False False False False False False False False False True False False False
18276 False False False False False False False False False False False False False False False True False False False
18277 False False False False False False False False False False False False False False False True False False False

18278 rows × 19 columns

In [30]:
####Count of null values in all columns
pd.isnull(df_DG_Attr).sum()
Out[30]:
Player_id                        0
short_name                       0
stamina                          0
strength                         0
defending                     2036
physic                        2036
defending_marking                0
defending_standing_tackle        0
defending_sliding_tackle         0
aggression                       0
interceptions                    0
goalkeeping_diving               0
goalkeeping_handling             0
goalkeeping_kicking              0
goalkeeping_reflexes             0
gk_speed                     16242
goalkeeping_positioning          0
shot_power                       0
power_jumping                    0
dtype: int64
In [31]:
#### Replacing NA values with 0 as the data consists of all the Player's data and have null values where the player is not eligible for that attribute
## gk_speed is null for the players except for goalkeepers
##defending and physic is null for the goalkeepers,hence replacing with 0
df_DG_Attr=df_DG_Attr.fillna(0)
df_DG_Attr
Out[31]:
Player_id short_name stamina strength defending physic defending_marking defending_standing_tackle defending_sliding_tackle aggression interceptions goalkeeping_diving goalkeeping_handling goalkeeping_kicking goalkeeping_reflexes gk_speed goalkeeping_positioning shot_power power_jumping
0 158023 L. Messi 75 68 39.0 66.0 33 37 26 48 40 6 11 15 8 0.0 14 86 68
1 20801 Cristiano Ronaldo 85 78 35.0 78.0 28 32 24 63 29 7 11 15 11 0.0 14 95 95
2 190871 Neymar Jr 81 49 32.0 58.0 27 26 29 51 36 9 9 15 11 0.0 15 80 61
3 200389 J. Oblak 41 78 0.0 0.0 27 12 18 34 19 87 92 78 89 52.0 90 59 78
4 183277 E. Hazard 84 63 35.0 66.0 34 27 22 54 41 11 12 6 8 0.0 8 82 56
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
18273 245006 Shao Shuai 60 47 47.0 51.0 45 50 52 46 48 5 5 13 14 0.0 5 36 70
18274 250995 Xiao Mingjie 55 44 48.0 48.0 42 53 57 42 49 13 6 14 9 0.0 11 33 72
18275 252332 Zhang Wei 50 51 48.0 51.0 46 52 46 50 45 7 8 10 14 0.0 6 48 60
18276 251110 Wang Haijian 42 55 45.0 52.0 39 44 54 57 49 14 9 13 13 0.0 13 46 61
18277 233449 Pan Ximing 40 61 47.0 55.0 52 42 48 54 47 8 12 14 10 0.0 9 41 64

18278 rows × 19 columns

In [32]:
pd.isnull(df_DG_Attr).sum()
Out[32]:
Player_id                    0
short_name                   0
stamina                      0
strength                     0
defending                    0
physic                       0
defending_marking            0
defending_standing_tackle    0
defending_sliding_tackle     0
aggression                   0
interceptions                0
goalkeeping_diving           0
goalkeeping_handling         0
goalkeeping_kicking          0
goalkeeping_reflexes         0
gk_speed                     0
goalkeeping_positioning      0
shot_power                   0
power_jumping                0
dtype: int64
End of code by Priyanka Chimthankar for Data pre-processing of DG attributes data

Part 2 - Reading and Writing Processed Data to PostgreSql

In [ ]:
#Connect to PostgreSQL and create a database
In [102]:
try:
    #Creating a connection variable to connect to PostgreSQL
    connection = pg.connect(database="postgres", user='postgres', password='password', host='127.0.0.1', port= '5432')
    
    connection.autocommit = True
    
    cursor = connection.cursor()
    
    ##Creating Database
    cursor.execute("create database PES_Players_2020")
    
    cursor.execute("SELECT version();")
    # Fetch result
    record = cursor.fetchone()
    print("You are connected to - ", record, "\n")
    print("Database created successfully !!")
    
except:
    print("Error while connecting to PostgreSQL !!")
You are connected to -  ('PostgreSQL 13.1, compiled by Visual C++ build 1914, 64-bit',) 

Database created successfully !!
Start of code by Pooja Rakate for Writing and reading Player Details data to and from PostgreSQL
In [2]:
##Extablishing SQLAlchemy engine connection to create a table in the database

engine = sqla.create_engine('postgresql://postgres:password@localhost:5432/pes_players_2020')
con_engine = engine.connect()
In [148]:
##Creating a table Player_Details in database 'pes_players_2020' created in above step
#If table already exists the it will raise an error. we can change this to append if table already exists 
#by changing the value to if_exists argument

df_Player_Details.to_sql('Player_Details', con_engine, if_exists = 'fail', index = False)

#Check if the table is created
print(engine.table_names())
['Player_Details']
In [3]:
##Fetching top 10 rows from Player_Details table from postgreSql and displaying it here

result = engine.execute('select * from "Player_Details" LIMIT 10').fetchall()
num_records = 0

for r in result:
    num_records= num_records+1
    print(r)
    
print(num_records)
(158023, 'L. Messi', 'Lionel Andrés Messi Cuccittini', 32, '24-06-1987', 170, 72, 'Argentina', 'FC Barcelona', 94, 94, 95500000, 565000, 'RW, CF, ST', 'Left', 5, 4, 4, 'Medium/Low', 195800000.0, 'RW', 10.0, '')
(20801, 'Cristiano Ronaldo', 'Cristiano Ronaldo dos Santos Aveiro', 34, '05-02-1985', 187, 83, 'Portugal', 'Juventus', 93, 93, 58500000, 405000, 'ST, LW', 'Right', 5, 4, 5, 'High/Low', 96500000.0, 'LW', 7.0, 'LS')
(190871, 'Neymar Jr', 'Neymar da Silva Santos Junior', 27, '05-02-1992', 175, 68, 'Brazil', 'Paris Saint-Germain', 92, 92, 105500000, 290000, 'LW, CAM', 'Right', 5, 5, 5, 'High/Medium', 195200000.0, 'CAM', 10.0, 'LW')
(200389, 'J. Oblak', 'Jan Oblak', 26, '07-01-1993', 188, 87, 'Slovenia', 'Atlético Madrid', 91, 93, 77500000, 125000, 'GK', 'Right', 3, 3, 1, 'Medium/Medium', 164700000.0, 'GK', 13.0, 'GK')
(183277, 'E. Hazard', 'Eden Hazard', 28, '07-01-1991', 175, 74, 'Belgium', 'Real Madrid', 91, 91, 90000000, 470000, 'LW, CF', 'Right', 4, 4, 4, 'High/Medium', 184500000.0, 'LW', 7.0, 'LF')
(192985, 'K. De Bruyne', 'Kevin De Bruyne', 28, '28-06-1991', 181, 70, 'Belgium', 'Manchester City', 91, 91, 90000000, 370000, 'CAM, CM', 'Right', 4, 5, 4, 'High/High', 166500000.0, 'RCM', 17.0, 'RCM')
(192448, 'M. ter Stegen', 'Marc-André ter Stegen', 27, '30-04-1992', 187, 85, 'Germany', 'FC Barcelona', 90, 93, 67500000, 250000, 'GK', 'Right', 3, 4, 1, 'Medium/Medium', 143400000.0, 'GK', 1.0, 'SUB')
(203376, 'V. van Dijk', 'Virgil van Dijk', 27, '08-07-1991', 193, 92, 'Netherlands', 'Liverpool', 90, 91, 78000000, 200000, 'CB', 'Right', 3, 3, 2, 'Medium/Medium', 150200000.0, 'LCB', 4.0, 'LCB')
(177003, 'L. Modrić', 'Luka Modrić', 33, '09-09-1985', 172, 66, 'Croatia', 'Real Madrid', 90, 90, 45000000, 340000, 'CM', 'Right', 4, 4, 4, 'High/High', 92300000.0, 'RCM', 10.0, '')
(209331, 'M. Salah', 'Mohamed  Salah Ghaly', 27, '15-06-1992', 175, 71, 'Egypt', 'Liverpool', 90, 90, 80500000, 240000, 'RW, ST', 'Left', 3, 3, 4, 'High/Medium', 148900000.0, 'RW', 11.0, 'RW')
10
In [62]:
##Storing Tables from PostgreSQL into dataframe in Pandas for further visualisation purposes

df_Player_Det_PG = pd.read_sql_table("Player_Details", con_engine);

df_Player_Det_PG.shape
Out[62]:
(18278, 23)
End of code by Pooja Rakate for Writing and reading Player Details data to and from PostgreSQL
Start of code by Mayuresh Londhe for Writing and reading Player Positions data to and from PostgreSQL
In [155]:
#Creating Player_Position table

df_pos_rank.to_sql('Player_Position', con_engine, if_exists = 'fail', index = False)

#Check if the table is created
print(engine.table_names())
['Player_Details', 'Player_Position']
In [4]:
##Fetching top 10 rows from Player_Position table from postgreSql and displaying it here

result1 = engine.execute('select * from "Player_Position" LIMIT 10').fetchall()
num_records1 = 0

for r in result1:
    num_records1 = num_records1 + 1
    print(r)
    
print(num_records1)
(158023, 89.0, 89.0, 89.0, 93.0, 93.0, 93.0, 93.0, 93.0, 93.0, 93.0, 93.0, 92.0, 87.0, 87.0, 87.0, 92.0, 68.0, 66.0, 66.0, 66.0, 68.0, 63.0, 52.0, 52.0, 52.0, 63.0)
(20801, 91.0, 91.0, 91.0, 89.0, 90.0, 90.0, 90.0, 89.0, 88.0, 88.0, 88.0, 88.0, 81.0, 81.0, 81.0, 88.0, 65.0, 61.0, 61.0, 61.0, 65.0, 61.0, 53.0, 53.0, 53.0, 61.0)
(190871, 84.0, 84.0, 84.0, 90.0, 89.0, 89.0, 89.0, 90.0, 90.0, 90.0, 90.0, 89.0, 82.0, 82.0, 82.0, 89.0, 66.0, 61.0, 61.0, 61.0, 66.0, 61.0, 46.0, 46.0, 46.0, 61.0)
(200389, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
(183277, 83.0, 83.0, 83.0, 89.0, 88.0, 88.0, 88.0, 89.0, 89.0, 89.0, 89.0, 89.0, 83.0, 83.0, 83.0, 89.0, 66.0, 63.0, 63.0, 63.0, 66.0, 61.0, 49.0, 49.0, 49.0, 61.0)
(192985, 82.0, 82.0, 82.0, 87.0, 87.0, 87.0, 87.0, 87.0, 88.0, 88.0, 88.0, 88.0, 87.0, 87.0, 87.0, 88.0, 77.0, 77.0, 77.0, 77.0, 77.0, 73.0, 66.0, 66.0, 66.0, 73.0)
(192448, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
(203376, 69.0, 69.0, 69.0, 67.0, 69.0, 69.0, 69.0, 67.0, 69.0, 69.0, 69.0, 69.0, 74.0, 74.0, 74.0, 69.0, 79.0, 83.0, 83.0, 83.0, 79.0, 81.0, 87.0, 87.0, 87.0, 81.0)
(177003, 77.0, 77.0, 77.0, 84.0, 83.0, 83.0, 83.0, 84.0, 86.0, 86.0, 86.0, 85.0, 87.0, 87.0, 87.0, 85.0, 81.0, 81.0, 81.0, 81.0, 81.0, 79.0, 72.0, 72.0, 72.0, 79.0)
(209331, 84.0, 84.0, 84.0, 88.0, 88.0, 88.0, 88.0, 88.0, 87.0, 87.0, 87.0, 87.0, 81.0, 81.0, 81.0, 87.0, 70.0, 67.0, 67.0, 67.0, 70.0, 66.0, 57.0, 57.0, 57.0, 66.0)
10
In [63]:
##Storing Tables from PostgreSQL into dataframe in Pandas for visualisation purposes

df_Player_Pos_PG = pd.read_sql_table("Player_Position", con_engine);

df_Player_Pos_PG.shape
Out[63]:
(18278, 27)
End of code by Mayuresh Londhe for Writing and reading Player Positions data to and from PostgreSQL
Start of code by Ankit Rungta for Writing and reading AM Player Attributes data to and from PostgreSQL
In [156]:
#Creating AM_Player_Attributes table

df_AM_Attr.to_sql('AM_Player_Attributes', con_engine, if_exists = 'fail', index = False)

#Check if the table is created
print(engine.table_names())
['Player_Details', 'Player_Position', 'AM_Player_Attributes']
In [5]:
##Fetching top 10 rows from AM_Player_Attributes table from postgreSql and displaying it here

result2 = engine.execute('select * from "AM_Player_Attributes" LIMIT 10').fetchall()
num_records2 = 0

for r in result2:
    num_records2 = num_records2 + 1
    print(r)
    
print(num_records2)
(158023, 87.0, 92.0, 92.0, 96.0, 88, 95, 70, 92, 88, 97, 93, 94, 92, 96, 91, 84, 93, 95, 95, 94, 94, 94, 75, 96)
(20801, 90.0, 93.0, 82.0, 89.0, 84, 94, 89, 83, 87, 89, 81, 76, 77, 92, 89, 91, 87, 96, 71, 93, 95, 82, 85, 95)
(190871, 91.0, 85.0, 87.0, 95.0, 87, 87, 62, 87, 87, 96, 88, 87, 81, 95, 94, 89, 96, 92, 84, 84, 87, 90, 90, 94)
(200389, 0.0, 0.0, 0.0, 0.0, 13, 11, 15, 43, 13, 12, 13, 14, 40, 30, 43, 60, 67, 88, 49, 12, 11, 65, 11, 68)
(183277, 91.0, 83.0, 86.0, 94.0, 81, 84, 61, 89, 83, 95, 83, 79, 83, 94, 94, 88, 95, 90, 94, 80, 87, 89, 88, 91)
(192985, 76.0, 86.0, 92.0, 86.0, 93, 82, 55, 92, 82, 86, 85, 83, 91, 91, 77, 76, 78, 91, 76, 90, 88, 94, 79, 91)
(192448, 0.0, 0.0, 0.0, 0.0, 18, 14, 11, 61, 14, 21, 18, 12, 63, 30, 38, 50, 37, 86, 43, 10, 11, 70, 25, 70)
(203376, 77.0, 60.0, 70.0, 71.0, 53, 52, 86, 78, 45, 70, 60, 70, 81, 76, 74, 79, 61, 88, 53, 64, 47, 65, 62, 89)
(177003, 74.0, 76.0, 89.0, 89.0, 86, 72, 55, 92, 76, 87, 85, 78, 88, 92, 77, 71, 92, 89, 93, 82, 79, 91, 82, 92)
(209331, 93.0, 86.0, 81.0, 89.0, 79, 90, 59, 84, 79, 89, 83, 69, 75, 89, 94, 92, 91, 92, 88, 84, 92, 84, 77, 91)
10
In [61]:
##Storing Tables from PostgreSQL into dataframe in Pandas for visualisation purposes

df_AM_Attr_PG = pd.read_sql_table("AM_Player_Attributes", con_engine);

df_AM_Attr_PG.shape
Out[61]:
(18278, 25)
End of code by Ankit Rungta for Writing and reading AM Player Attributes data to and from PostgreSQL
Start of code by Priyanka Chimthankar for Writing and reading DG Player Attributes data to and from PostgreSQL
In [157]:
#Creating DG_Player_Attributes table

df_DG_Attr.to_sql('DG_Player_Attributes', con_engine, if_exists = 'fail', index = False)

#Check if the table is created
print(engine.table_names())
['Player_Details', 'Player_Position', 'AM_Player_Attributes', 'DG_Player_Attributes']
In [6]:
##Fetching top 10 rows from DG_Player_Attributes table from postgreSql and displaying it here


result3 = engine.execute('select * from "DG_Player_Attributes" LIMIT 10').fetchall()
num_records3 = 0

for r in result3:
    num_records3 = num_records3 + 1
    print(r)
    
print(num_records3)
(158023, 75, 68, 39.0, 66.0, 33, 37, 26, 48, 40, 6, 11, 15, 8, 0.0, 14, 86, 68)
(20801, 85, 78, 35.0, 78.0, 28, 32, 24, 63, 29, 7, 11, 15, 11, 0.0, 14, 95, 95)
(190871, 81, 49, 32.0, 58.0, 27, 26, 29, 51, 36, 9, 9, 15, 11, 0.0, 15, 80, 61)
(200389, 41, 78, 0.0, 0.0, 27, 12, 18, 34, 19, 87, 92, 78, 89, 52.0, 90, 59, 78)
(183277, 84, 63, 35.0, 66.0, 34, 27, 22, 54, 41, 11, 12, 6, 8, 0.0, 8, 82, 56)
(192985, 89, 74, 61.0, 78.0, 68, 58, 51, 76, 61, 15, 13, 5, 13, 0.0, 10, 91, 63)
(192448, 35, 78, 0.0, 0.0, 25, 13, 10, 43, 22, 88, 85, 88, 90, 45.0, 88, 66, 79)
(203376, 75, 92, 90.0, 86.0, 91, 92, 85, 82, 89, 13, 10, 13, 11, 0.0, 11, 81, 90)
(177003, 85, 58, 72.0, 66.0, 68, 76, 71, 62, 82, 13, 9, 7, 9, 0.0, 14, 79, 68)
(209331, 85, 73, 45.0, 74.0, 38, 43, 41, 63, 55, 14, 14, 9, 14, 0.0, 11, 80, 69)
10
In [64]:
##Storing Tables from PostgreSQL into dataframe in Pandas for visualisation purposes

df_DG_Attr_PG = pd.read_sql_table("DG_Player_Attributes", con_engine);

df_DG_Attr_PG.shape
Out[64]:
(18278, 18)
End of code by Priyanka Chimthankar for Writing and reading DG Player Attributes data to and from PostgreSQL

Part 3 - Creating a single Dataframe of all the four tables

In [48]:
###concatenating all the dataframes df_Player_Det_PG, df_Player_Pos_PG, df_AM_Attr_PG and df_DG_Attr_PG 
#in one DataFrame by joining on the Player_id column to use it for visualisation

df_list=[df_Player_Det_PG, df_Player_Pos_PG, df_AM_Attr_PG, df_DG_Attr_PG]

df_PES_Visualisation = rd(lambda left,right: pd.merge(left,right,on='Player_id'), df_list)
In [135]:
df_PES_Visualisation.head()
Out[135]:
Unnamed: 0 Player_id short_name long_name age dob height_cm weight_kg nationality club ... aggression interceptions goalkeeping_diving goalkeeping_handling goalkeeping_kicking goalkeeping_reflexes gk_speed goalkeeping_positioning shot_power power_jumping
0 0 158023 L. Messi Lionel Andrés Messi Cuccittini 32 24-06-1987 170 72 Argentina FC Barcelona ... 48 40 6 11 15 8 0.0 14 86 68
1 1 20801 Cristiano Ronaldo Cristiano Ronaldo dos Santos Aveiro 34 05-02-1985 187 83 Portugal Juventus ... 63 29 7 11 15 11 0.0 14 95 95
2 2 190871 Neymar Jr Neymar da Silva Santos Junior 27 05-02-1992 175 68 Brazil Paris Saint-Germain ... 51 36 9 9 15 11 0.0 15 80 61
3 3 200389 J. Oblak Jan Oblak 26 07-01-1993 188 87 Slovenia Atlético Madrid ... 34 19 87 92 78 89 52.0 90 59 78
4 4 183277 E. Hazard Eden Hazard 28 07-01-1991 175 74 Belgium Real Madrid ... 54 41 11 12 6 8 0.0 8 82 56

5 rows × 91 columns

In [136]:
df_PES_Visualisation.shape
Out[136]:
(18278, 91)
In [175]:
##Converting the final data frame into csv file to share it with team members so that each member 
#can work on visualisations of their respective parts.

df_PES_Visualisation.to_csv('D:/Pooja_NCI/1_Semester/DataBase and Analytical programming/Project/Code from everyone/PES_Final_for_Visualisation.csv')

Part 4 - Visualisation

In [8]:
# Summary function
df_PES_Visualisation.describe()
Out[8]:
Unnamed: 0 Player_id age height_cm weight_kg overall potential value_eur wage_eur international_reputation ... aggression interceptions goalkeeping_diving goalkeeping_handling goalkeeping_kicking goalkeeping_reflexes gk_speed goalkeeping_positioning shot_power power_jumping
count 18278.000000 18278.000000 18278.000000 18278.000000 18278.000000 18278.000000 18278.000000 1.827800e+04 18278.000000 18278.000000 ... 18278.000000 18278.000000 18278.000000 18278.000000 18278.000000 18278.000000 18278.000000 18278.000000 18278.000000 18278.000000
mean 9138.500000 219738.864482 25.283291 181.362184 75.276343 66.244994 71.546887 2.484038e+06 9456.942773 1.103184 ... 55.742149 46.380239 16.572765 16.354853 16.212934 16.709924 4.210417 16.368038 58.181694 64.928165
std 5276.548446 27960.200461 4.656964 6.756961 7.047744 6.949953 6.139669 5.585481e+06 21351.714095 0.378861 ... 17.318157 20.775812 17.738069 16.996925 16.613665 18.038125 12.410438 17.136497 13.326083 11.934059
min 0.000000 768.000000 16.000000 156.000000 50.000000 48.000000 49.000000 0.000000e+00 0.000000 1.000000 ... 9.000000 3.000000 1.000000 1.000000 1.000000 1.000000 0.000000 1.000000 14.000000 19.000000
25% 4569.250000 204445.500000 22.000000 177.000000 70.000000 62.000000 67.000000 3.250000e+05 1000.000000 1.000000 ... 44.000000 25.000000 8.000000 8.000000 8.000000 8.000000 0.000000 8.000000 48.000000 58.000000
50% 9138.500000 226165.000000 25.000000 181.000000 75.000000 66.000000 71.000000 7.000000e+05 3000.000000 1.000000 ... 58.000000 52.000000 11.000000 11.000000 11.000000 11.000000 0.000000 11.000000 59.000000 66.000000
75% 13707.750000 240795.750000 29.000000 186.000000 80.000000 71.000000 75.000000 2.100000e+06 8000.000000 1.000000 ... 69.000000 64.000000 14.000000 14.000000 14.000000 14.000000 0.000000 14.000000 68.000000 73.000000
max 18277.000000 252905.000000 42.000000 205.000000 110.000000 94.000000 95.000000 1.055000e+08 565000.000000 5.000000 ... 95.000000 92.000000 90.000000 92.000000 93.000000 92.000000 65.000000 91.000000 95.000000 95.000000

8 rows × 81 columns

In [ ]:
 
In [9]:
##Age wise analysis
fig, ax = plt.subplots(1,2,figsize=(18,6))

sns.distplot(ax=ax[0], a=df_PES_Visualisation.age, kde=False,color="k")
sns.scatterplot(y='overall', x='age', data=df_PES_Visualisation, ax=ax[1], color="g")

ax[0].set_title("Age Distribution in PES 2020")
ax[1].set_title("Overall Ranking distribution for Player's age")


for i in range(2):
    ax[i].set_ylabel("Number")
In [7]:
#Univariate distribution of Observations using Distplot & Fitness of player
fig, ax = plt.subplots(1,3,figsize=(18,6))

sns.distplot(ax=ax[0], a=df_PES_Visualisation.height_cm, kde=False,color="k")
sns.distplot(ax=ax[1], a=df_PES_Visualisation.weight_kg, kde=False,color="k")
sns.regplot(y="height_cm", x="weight_kg", data=df_PES_Visualisation, ax=ax[2],color="b")

ax[0].set_title("Height in CM Distribution in PES 2020")
ax[1].set_title("Weight in kg Distribution in PES 2020")
ax[2].set_title("Height and Weight Comparison for different players")


for i in range(3):
    ax[i].set_ylabel("Number")
In [10]:
#PLotting Players with Stamina,Strength and Physic greater than 85

#Defining function
def scatter3D(x , y , z , name_of_the_player , xlabel , ylabel , zlabel , plot_title):
    zoom_camera = dict(up=dict(x=0, y=0, z=1),center=dict(x=0, y=0, z=0),eye=dict(x=2, y=2, z=0.1))
    
    plot = go.Scatter3d(
        x = x,
        y = y,
        z = z,
        mode = 'markers',
        text  = name_of_the_player,
        marker = dict(
            size = 12,
            color = z,
            colorscale = 'sunset',
            showscale = True,
            line = dict(
                color = 'rgba(225 , 225 , 225 , 0.14)',
                width = 0.5
            ),
            opacity = 0.8
        )
    )
    
    layout = go.Layout(
        title = plot_title,
        scene = dict(
            camera = zoom_camera,
            xaxis = dict(title  = xlabel),
            yaxis = dict(title  = ylabel),
            zaxis = dict(title  = zlabel)
        )
    )
    data = [plot]
    fig = go.Figure(data = data , layout = layout)
    py.offline.iplot(fig)

#Function Call
scatter3D(df_PES_Visualisation['stamina'].where(df_PES_Visualisation['stamina'] > 85),
          df_PES_Visualisation['strength'].where(df_PES_Visualisation['strength'] > 85),
          df_PES_Visualisation['physic'].where(df_PES_Visualisation['physic'] > 85),
          df_PES_Visualisation['short_name'],
         'Stamina' , 
         'Strength',
         'Physic',
         'Players with Stamina,Strength and Physic greater than 85')  
8686.58787.58888.58989.590Players with Stamina,Strength and Physic greater than 85
In [11]:
#Wage and Value of Player

sns.regplot(y='value_eur', x='wage_eur', data=df_PES_Visualisation,color="b", label = 'Wage and Value Comparison for different players')
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x247dc49fd00>
In [12]:
#Analysis of players Wage by their work_rate  and playing position
fig, ax = plt.subplots(2,1,figsize=(18,18))
ax = ax.ravel()

##Remove
sns.barplot(data=df_PES_Visualisation, y="wage_eur", x="team_position", ax=ax[0])
##

sns.barplot(data=df_PES_Visualisation, y="work_rate", x="wage_eur", ax=ax[1])

ax[0].set_title("Wage by Player's Position")
ax[1].set_title("Wage by Work rate")


fig.tight_layout()
In [13]:
#Different Attributes of Player in Relation with Overall

fig, ax = plt.subplots(2,3,figsize=(18,12))
ax = ax.ravel()

sns.scatterplot(y="overall", x="potential", data=df_PES_Visualisation, color="brown", ax=ax[0])
sns.scatterplot(y='overall', x='physic', data=df_PES_Visualisation, color="r", ax=ax[1])
sns.scatterplot(y="overall", x="shooting", data=df_PES_Visualisation, color="black", ax=ax[2])
sns.scatterplot(y='overall', x='passing', data=df_PES_Visualisation, color="g", ax=ax[3])
sns.scatterplot(y="overall", x="dribbling", data=df_PES_Visualisation, color="b", ax=ax[4])
sns.scatterplot(y='overall', x='defending', data=df_PES_Visualisation, color="y", ax=ax[5])


ax[0].set_title("Potential vs Overall")
ax[1].set_title("Physic vs Overall")
ax[2].set_title("Shooting vs Overall")
ax[3].set_title("Passing vs Overall")
ax[4].set_title("Dribbling vs Overall")
ax[5].set_title("Defending vs Overall")

fig.suptitle("Different Attributes of Player in Relation with Overall", size=20)
Out[13]:
Text(0.5, 0.98, 'Different Attributes of Player in Relation with Overall')
In [13]:
#Distribution of overall rating

plt.figure(figsize=(8,8))
sns.distplot( a=df_PES_Visualisation.overall, kde=False)
plt.title("Distribution of Overall Rating")
plt.ylabel("frequency")
Out[13]:
Text(0, 0.5, 'frequency')
In [14]:
##Comparison of Defending rating for different player positions

def plot(x  , y  , data , rows , cols):
        color_used = []
        n = 0
        for feature in y:
            
            for i in range(1000):
                colour = 'lightblue'
                if colour not in color_used:
                    color_used.append(colour)
                    break
    
            n += 1 
            plt.subplot(rows , cols , n)
            plt.subplots_adjust(hspace = 0.5 , wspace = 0.5)
            sns.regplot(x  = x , y = feature , data = data ,color = colour)
            
vals = ['st' , 'cam' , 'cb']
plt.figure(1 , figsize = (15 , 6))
plot(x = 'defending' , y = vals , data = df_PES_Visualisation , 
                         rows = 1 , cols = 3)
plt.show()
In [15]:
##Top 10 Nations

labels = ['England', 'Germany', 'Spain', 'France', 'Argentina','Brazil','Italy','Columbia','Japan','Netherlands']

sizes = df_PES_Visualisation['nationality'].value_counts().head(10)
colors = plt.cm.copper(np.linspace(0, 1, 5))
plt.rcParams['figure.figsize'] = (9, 9)
plt.pie(sizes, colors = colors, shadow = True,labels=labels)
plt.title('Top 10 Nations consisting maximum number of Total PES Players', fontsize = 20)
plt.legend()
plt.show()
In [16]:
#Distribution of wage for top 10 players
plt.rcParams['figure.figsize'] = (15, 10)
df_best_players=df_PES_Visualisation.sort_values(by="overall",ascending=False).head(10)
sns.barplot(data=df_best_players,y="wage_eur",x="short_name" ,color = 'lightblue')
plt.xlabel('Name of the Players', fontsize = 12)
plt.ylabel('Wage Range of Players', fontsize = 12)
plt.title('Wage Distribution of top 10 Rated Players', fontsize = 15)
plt.show()
In [17]:
#Percentage of players in different attacking role

attacker = ['RW', 'LW', 'ST', 'CF', 'LS', 'RS', 'RF', 'LF'] # List of attacker postions

sample = df_PES_Visualisation.query('team_position in @attacker')   #query to select attackers from team postion column as specified in team postion list 

#Creating pie type object using pie() method
figure_Attacker = px.pie(sample, names='team_position',  # Using pie method of plotly to visualize
             color_discrete_sequence=px.colors.sequential.Plasma_r, #specifying color gradient sequence for pie
             title='Percentage of players in Attacker Role')  # Title for the plot

figure_Attacker.update_traces(textposition='inside', textinfo='percent+label') # specifying textpostion and type of text i.e. % and a lable

#specifying Layout of the plot using paper_bgcolor plot_bgcolor font
figure_Attacker.update_layout(paper_bgcolor='rgba(0,0,0,0)',  #specifying backgroud color for paper
                  plot_bgcolor='rgba(0,0,0,0)',  #specifying backgroud color for plot
                  font=dict(family='Cambria, monospace', size=12, color='darkorange'))  #specifying font size and color of the text

figure_Attacker.show()
ST37.4%LS15.9%RS15.9%LW13.2%RW13.2%RF1.55%LF1.55%CF1.14%
STLSRSLWRWRFLFCFPercentage of players in Attacker Role
In [18]:
##Percentage of players in different Midfielder roles

midfielder = ['CAM', 'RCM', 'CDM', 'LDM', 'RM', 'LCM', 'LM', 'RDM', 'RAM','CM', 'LAM']    # List of Midfielder postions

sample = df_PES_Visualisation.query('team_position in @midfielder')    #query to select Midfielders from team postion column as specified in team postion list 

#Creating pie type object using pie() method
figure_midfielder = px.pie(sample, names='team_position',   # Using pie method of plotly to visualize
             color_discrete_sequence=px.colors.sequential.Viridis_r, #specifying color gradient sequence for pie
             title='Percentage of players in Different Midfielder Role')  # Title for the plot
figure_midfielder.update_traces(textposition='inside', textinfo='percent+label')  # specifying textpostion and type of text i.e. % and a lable

#specifying Layout of the plot using paper_bgcolor plot_bgcolor font
figure_midfielder.update_layout(paper_bgcolor='rgba(0,0,0,0)', #specifying backgroud color for paper
                  plot_bgcolor='rgba(0,0,0,0)',      #specifying backgroud color for plot
                  font=dict(family='Cambria, monospace', size=14, color='blue')) #specifying font size and color of the text
figure_midfielder.show()
RCM15.1%LCM15.1%RM14.7%LM14.6%CAM11.4%RDM8.97%LDM8.9%CDM6.66%CM2.8%RAM0.846%LAM0.846%
RCMLCMRMLMCAMRDMLDMCDMCMRAMLAMPercentage of players in Different Midfielder Role
In [19]:
# ##Percentage of players in different Defender roles

defender = ['LCB', 'RCB', 'LB', 'RB', 'CB', 'RWB', 'LWB'] # List of defender postions

query_plot = df_PES_Visualisation.query('team_position in @defender')  #query to select defedenders from team postion column as specified in team postion list

#Creating pie type object using pie() method
figure_defender = px.pie(query_plot, names='team_position', # Using pie method of plotly to visualize
             color_discrete_sequence=px.colors.sequential.Magma_r, #specifying color gradient sequence for pie 
             title='Percentage of Players in Different Defender Role')     # Title for the plot

figure_defender.update_traces(textposition='inside', textinfo='percent+label') # specifying textpostion and type of text i.e. % and a lable

#specifying Layout of the plot using paper_bgcolor plot_bgcolor font
figure_defender.update_layout(        
                  paper_bgcolor='rgba(0,0,0,0)',#specifying backgroud color for paper
                  plot_bgcolor='rgba(0,0,0,0)', #specifying backgroud color for plot 
                  font=dict(family='Cambria, monospace', size=14, color='firebrick') #specifying font size and color of the text
                  )
figure_defender.show()
LCB24.8%RCB24.8%LB21.1%RB21.1%CB3.77%RWB2.18%LWB2.18%
LCBRCBLBRBCBRWBLWBPercentage of Players in Different Defender Role
In [20]:
#Selecting the best team from the user defined player positions

def best_formation_selection(position):  # creted function definition 
    df_PES_Visualisation_copy = df_PES_Visualisation.copy()
    team = []
    
    for i in position:   # for loop to iterate over positions specified in function call 
        
        # computing the max player count for player position and appending it to a team
        team.append([i,df_PES_Visualisation_copy.loc[[df_PES_Visualisation_copy[df_PES_Visualisation_copy['team_position'] == i]['overall'].idxmax()]]['short_name'].to_string(index = False), df_PES_Visualisation_copy[df_PES_Visualisation_copy['team_position'] == i]['overall'].max()])
        
        # Dropping the players appended to team so that they will be not be selected again in team
        df_PES_Visualisation_copy.drop(df_PES_Visualisation_copy[df_PES_Visualisation_copy['team_position'] == i]['overall'].idxmax(), inplace = True)
    
        #returning team by reshaping it to array of 11*3  
    return pd.DataFrame(np.array(team).reshape(11,3), columns = ['Position', 'Player', 'Overall'])
    

# 4-3-3         4 Defenders  3 midfielders 3 forwrds 1 goalkeeper
formation_433 = ['GK', 'LB', 'CB', 'CB', 'RB', 'LM', 'CDM', 'RM', 'LW', 'ST', 'RW']
print ('Formation: 4-3-3')
#Function Call
print (best_formation_selection(formation_433))
Formation: 4-3-3
   Position              Player Overall
0        GK            J. Oblak      91
1        LB          Jordi Alba      87
2        CB          S. de Vrij      84
3        CB           F. Acerbi      83
4        RB          J. Kimmich      86
5        LM       P. Aubameyang      88
6       CDM     Sergio Busquets      89
7        RM          C. Eriksen      88
8        LW   Cristiano Ronaldo      93
9        ST             H. Kane      89
10       RW            L. Messi      94
In [21]:
# 3-5-2   2 forwards 5 midfielders 3 defenders 1 Goalkeeper
formation_352 = ['GK', 'LWB', 'CB', 'RWB', 'LM', 'CDM', 'CAM', 'CM', 'RM', 'LW', 'RW']
print ('3-5-2')
print (best_formation_selection(formation_352))
3-5-2
   Position              Player Overall
0        GK            J. Oblak      91
1       LWB      M. Halstenberg      80
2        CB          S. de Vrij      84
3       RWB      P. Kadeřábek      80
4        LM       P. Aubameyang      88
5       CDM     Sergio Busquets      89
6       CAM           Neymar Jr      92
7        CM        Luis Alberto      84
8        RM          C. Eriksen      88
9        LW   Cristiano Ronaldo      93
10       RW            L. Messi      94
In [22]:
##Player's international reputation
df_PES_Visualisation[df_PES_Visualisation['international_reputation'] == 5][['short_name', 'age', 'club', 'nationality','overall']].style.background_gradient('bwr')
Out[22]:
short_name age club nationality overall
0 L. Messi 32 FC Barcelona Argentina 94
1 Cristiano Ronaldo 34 Juventus Portugal 93
2 Neymar Jr 27 Paris Saint-Germain Brazil 92
19 L. Suárez 32 FC Barcelona Uruguay 89
31 M. Neuer 33 FC Bayern München Germany 88
96 Z. Ibrahimović 37 LA Galaxy Sweden 85
In [ ]:
 
In [23]:
##Clubwise Best team formation

def club_best_formation_selection(position,club_name):  # created function definition 
    df_PES_Visualisation_copy = df_PES_Visualisation.copy()
    team = []
    
    df_PES_Visualisation_copy = df_PES_Visualisation_copy.loc[df_PES_Visualisation_copy['club'] == club_name]                                                 
    
    for i in position:   # for loop to iterate over positions specified in function call 
        
        # computing the max player count for player position and appending it to a team
        team.append([i,df_PES_Visualisation_copy.loc[[df_PES_Visualisation_copy[df_PES_Visualisation_copy['team_position'] == i]['overall'].idxmax()]]['short_name'].to_string(index = False), 
                     df_PES_Visualisation_copy[df_PES_Visualisation_copy['team_position'] == i]['overall'].max()])
        
        # Dropping the players appended to team so that they will be not be selected again in team
        df_PES_Visualisation_copy.drop(df_PES_Visualisation_copy[df_PES_Visualisation_copy['team_position'] == i]['overall'].idxmax(), inplace = True)
    
        #returning team by reshaping it to array of 11*3
    print("Team Selection Based on Club : ",club_name)
    return pd.DataFrame(np.array(team).reshape(11,3), columns = ['Position', 'Player', 'Overall'])
    

# 4-3-3         4 Defenders  3 midfielders 3 forwrds 1 goalkeeper
formation_433 = ['GK', 'LB', 'RB', 'LCB', 'RCB', 'LCM', 'CDM', 'RCM', 'LW', 'CF', 'RW']
print ('4-3-3')
#Function Call

print (club_best_formation_selection(formation_433,'Real Madrid'))
4-3-3
Team Selection Based on Club :  Real Madrid
   Position          Player Overall
0        GK     T. Courtois      88
1        LB         Marcelo      85
2        RB        Carvajal      85
3       LCB    Sergio Ramos      89
4       RCB       R. Varane      85
5       LCM        T. Kroos      88
6       CDM        Casemiro      87
7       RCM      L. Modrić      90
8        LW       E. Hazard      91
9        CF      K. Benzema      87
10       RW   Vinícius Jr.      79
In [ ]:
 
In [24]:
##Nationwise best team selection

def nation_best_formation_selection(position,nation_name):  # created function definition 
    df_PES_Visualisation_copy = df_PES_Visualisation.copy()
    team = []
    
    df_PES_Visualisation_copy = df_PES_Visualisation_copy.loc[df_PES_Visualisation_copy['nationality'] == nation_name]                                                 
    
    for i in position:   # for loop to iterate over positions specified in function call 
        
        # computing the max player count for player position and appending it to a team
        team.append([i,df_PES_Visualisation_copy.loc[[df_PES_Visualisation_copy[df_PES_Visualisation_copy['team_position'] == i]['overall'].idxmax()]]['short_name'].to_string(index = False), df_PES_Visualisation_copy[df_PES_Visualisation_copy['team_position'] == i]['overall'].max()])
        
        # Dropping the players appended to team so that they will be not be selected again in team
        df_PES_Visualisation_copy.drop(df_PES_Visualisation_copy[df_PES_Visualisation_copy['team_position'] == i]['overall'].idxmax(), inplace = True)
    
        #returning team by reshaping it to array of 11*3 
    print("Team Selection Based on Nation : ",nation_name)
    return pd.DataFrame(np.array(team).reshape(11,3), columns = ['Position', 'Player', 'Overall'])
    

# 4-3-3         4 Defenders  3 midfielders 3 forwrds 1 goalkeeper
formation_433 = ['GK', 'LB', 'RB', 'LCB', 'RCB', 'LCM', 'CDM', 'RCM', 'LW', 'ST', 'RW']
print ('4-3-3')
#Function Call

print (nation_best_formation_selection(formation_433,'Spain'))
4-3-3
Team Selection Based on Nation :  Spain
   Position            Player Overall
0        GK            De Gea      89
1        LB        Jordi Alba      87
2        RB          Carvajal      85
3       LCB      Sergio Ramos      89
4       RCB            Piqué      88
5       LCM       David Silva      88
6       CDM   Sergio Busquets      89
7       RCM            Parejo      86
8        LW         Oyarzabal      82
9        ST    Borja Iglesias      83
10       RW   Iñaki Williams      82
In [ ]: